Preliminaries: load required packages and perform merges
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.7 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## Loading required package: Hmisc
##
## Loading required package: lattice
##
## Loading required package: survival
##
## Loading required package: Formula
##
##
## Attaching package: 'Hmisc'
##
##
## The following objects are masked from 'package:dplyr':
##
## src, summarize
##
##
## The following objects are masked from 'package:base':
##
## format.pval, units
##
##
##
## Attaching package: 'caret'
##
##
## The following object is masked from 'package:survival':
##
## cluster
##
##
## The following object is masked from 'package:purrr':
##
## lift
##
##
##
## Attaching package: 'xgboost'
##
##
## The following object is masked from 'package:dplyr':
##
## slice
##
##
## Loading required package: foreach
##
##
## Attaching package: 'foreach'
##
##
## The following objects are masked from 'package:purrr':
##
## accumulate, when
##
##
## Loading required package: iterators
##
## Loading required package: parallel
##
##
## Attaching package: 'pdp'
##
##
## The following object is masked from 'package:purrr':
##
## partial
rm(list = ls())
library(tidyverse)
library(readxl)
filenames <- list.files("./Ranked Measure Data2")
filenames
## [1] "2015.xls" "2016.xls" "2017.xls" "2018.xls" "2019.xls"
RMD <- data.frame(matrix(ncol = 0, nrow = 0))
for (filename in filenames){
yearnum <- gsub(".xls", "", filename)
RMD = RMD %>% bind_rows(assign(paste0("RMD",yearnum), read_excel(paste0("./Ranked Measure Data2/", filename), sheet="Ranked Measure Data", skip = 1) %>%
mutate(year=yearnum) %>%
select(FIPS, year, YPLLRateLow, YPLLRateHigh)))
}
## Warning: Expecting numeric in CB3145 / R3145C80: got '^This data was updated on March 29, 2017. Please see http://www.countyhealthrankings.org/content/data-changes for more information.'
## New names:
## • `Quartile` -> `Quartile...8`
## • `Sample Size` -> `Sample Size...9`
## • `95% CI - Low` -> `95% CI - Low...11`
## • `95% CI - High` -> `95% CI - High...12`
## • `Quartile` -> `Quartile...13`
## • `Sample Size` -> `Sample Size...14`
## • `95% CI - Low` -> `95% CI - Low...16`
## • `95% CI - High` -> `95% CI - High...17`
## • `Quartile` -> `Quartile...18`
## • `Sample Size` -> `Sample Size...19`
## • `95% CI - Low` -> `95% CI - Low...21`
## • `95% CI - High` -> `95% CI - High...22`
## • `Quartile` -> `Quartile...23`
## • `95% CI - Low` -> `95% CI - Low...28`
## • `95% CI - High` -> `95% CI - High...29`
## • `Quartile` -> `Quartile...30`
## • `Sample Size` -> `Sample Size...31`
## • `95% CI - Low` -> `95% CI - Low...33`
## • `95% CI - High` -> `95% CI - High...34`
## • `Quartile` -> `Quartile...35`
## • `95% CI - Low` -> `95% CI - Low...37`
## • `95% CI - High` -> `95% CI - High...38`
## • `Quartile` -> `Quartile...39`
## • `Quartile` -> `Quartile...41`
## • `95% CI - Low` -> `95% CI - Low...43`
## • `95% CI - High` -> `95% CI - High...44`
## • `Quartile` -> `Quartile...45`
## • `Quartile` -> `Quartile...48`
## • `Sample Size` -> `Sample Size...49`
## • `95% CI - Low` -> `95% CI - Low...51`
## • `95% CI - High` -> `95% CI - High...52`
## • `Quartile` -> `Quartile...53`
## • `Quartile` -> `Quartile...57`
## • `Quartile` -> `Quartile...60`
## • `95% CI - Low` -> `95% CI - Low...64`
## • `95% CI - High` -> `95% CI - High...65`
## • `Quartile` -> `Quartile...66`
## • `95% CI - Low` -> `95% CI - Low...69`
## • `95% CI - High` -> `95% CI - High...70`
## • `Quartile` -> `Quartile...71`
## • `Quartile` -> `Quartile...75`
## • `Quartile` -> `Quartile...79`
## • `# Mental Health Providers` -> `# Mental Health Providers...80`
## • `MHP Rate` -> `MHP Rate...81`
## • `MHP Ratio` -> `MHP Ratio...82`
## • `# Mental Health Providers` -> `# Mental Health Providers...83`
## • `MHP Rate` -> `MHP Rate...84`
## • `MHP Ratio` -> `MHP Ratio...85`
## • `Quartile` -> `Quartile...86`
## • `# Medicare Enrollees` -> `# Medicare Enrollees...87`
## • `95% CI - Low` -> `95% CI - Low...89`
## • `95% CI - High` -> `95% CI - High...90`
## • `Quartile` -> `Quartile...91`
## • `95% CI - Low` -> `95% CI - Low...94`
## • `95% CI - High` -> `95% CI - High...95`
## • `Quartile` -> `Quartile...96`
## • `# Medicare Enrollees` -> `# Medicare Enrollees...97`
## • `95% CI - Low` -> `95% CI - Low...99`
## • `95% CI - High` -> `95% CI - High...100`
## • `Quartile` -> `Quartile...101`
## • `Quartile` -> `Quartile...104`
## • `95% CI - Low` -> `95% CI - Low...108`
## • `95% CI - High` -> `95% CI - High...109`
## • `Quartile` -> `Quartile...110`
## • `Quartile` -> `Quartile...114`
## • `95% CI - Low` -> `95% CI - Low...117`
## • `95% CI - High` -> `95% CI - High...118`
## • `Quartile` -> `Quartile...119`
## • `Quartile` -> `Quartile...123`
## • `95% CI - Low` -> `95% CI - Low...127`
## • `95% CI - High` -> `95% CI - High...128`
## • `Quartile` -> `Quartile...129`
## • `Quartile` -> `Quartile...132`
## • `Quartile` -> `Quartile...135`
## • `95% CI - Low` -> `95% CI - Low...138`
## • `95% CI - High` -> `95% CI - High...139`
## • `Quartile` -> `Quartile...140`
## • `Quartile` -> `Quartile...142`
## • `Quartile` -> `Quartile...145`
## • `95% CI - Low` -> `95% CI - Low...148`
## • `95% CI - High` -> `95% CI - High...149`
## • `Quartile` -> `Quartile...150`
## • `95% CI - Low` -> `95% CI - Low...154`
## • `95% CI - High` -> `95% CI - High...155`
## • `Quartile` -> `Quartile...156`
## • `95% CI - Low` -> `95% CI - Low...159`
## • `95% CI - High` -> `95% CI - High...160`
## • `Quartile` -> `Quartile...161`
## Warning: Expecting numeric in BX3145 / R3145C76: got '^This data was updated on March 29, 2017. Please see http://www.countyhealthrankings.org/content/data-changes for more information.'
## New names:
## New names:
## • `Quartile` -> `Quartile...8`
## • `95% CI - Low` -> `95% CI - Low...10`
## • `95% CI - High` -> `95% CI - High...11`
## • `Quartile` -> `Quartile...12`
## • `95% CI - Low` -> `95% CI - Low...14`
## • `95% CI - High` -> `95% CI - High...15`
## • `Quartile` -> `Quartile...16`
## • `95% CI - Low` -> `95% CI - Low...18`
## • `95% CI - High` -> `95% CI - High...19`
## • `Quartile` -> `Quartile...20`
## • `95% CI - Low` -> `95% CI - Low...25`
## • `95% CI - High` -> `95% CI - High...26`
## • `Quartile` -> `Quartile...27`
## • `95% CI - Low` -> `95% CI - Low...29`
## • `95% CI - High` -> `95% CI - High...30`
## • `Quartile` -> `Quartile...31`
## • `95% CI - Low` -> `95% CI - Low...33`
## • `95% CI - High` -> `95% CI - High...34`
## • `Quartile` -> `Quartile...35`
## • `Quartile` -> `Quartile...37`
## • `95% CI - Low` -> `95% CI - Low...39`
## • `95% CI - High` -> `95% CI - High...40`
## • `Quartile` -> `Quartile...41`
## • `Quartile` -> `Quartile...43`
## • `95% CI - Low` -> `95% CI - Low...45`
## • `95% CI - High` -> `95% CI - High...46`
## • `Quartile` -> `Quartile...47`
## • `95% CI - Low` -> `95% CI - Low...51`
## • `95% CI - High` -> `95% CI - High...52`
## • `Quartile` -> `Quartile...53`
## • `Quartile` -> `Quartile...56`
## • `95% CI - Low` -> `95% CI - Low...60`
## • `95% CI - High` -> `95% CI - High...61`
## • `Quartile` -> `Quartile...62`
## • `95% CI - Low` -> `95% CI - Low...65`
## • `95% CI - High` -> `95% CI - High...66`
## • `Quartile` -> `Quartile...67`
## • `Quartile` -> `Quartile...71`
## • `Quartile` -> `Quartile...75`
## • `# Mental Health Providers` -> `# Mental Health Providers...76`
## • `MHP Rate` -> `MHP Rate...77`
## • `MHP Ratio` -> `MHP Ratio...78`
## • `# Mental Health Providers` -> `# Mental Health Providers...79`
## • `MHP Rate` -> `MHP Rate...80`
## • `MHP Ratio` -> `MHP Ratio...81`
## • `Quartile` -> `Quartile...82`
## • `# Medicare Enrollees` -> `# Medicare Enrollees...83`
## • `95% CI - Low` -> `95% CI - Low...85`
## • `95% CI - High` -> `95% CI - High...86`
## • `Quartile` -> `Quartile...87`
## • `95% CI - Low` -> `95% CI - Low...90`
## • `95% CI - High` -> `95% CI - High...91`
## • `Quartile` -> `Quartile...92`
## • `# Medicare Enrollees` -> `# Medicare Enrollees...93`
## • `95% CI - Low` -> `95% CI - Low...95`
## • `95% CI - High` -> `95% CI - High...96`
## • `Quartile` -> `Quartile...97`
## • `Quartile` -> `Quartile...100`
## • `95% CI - Low` -> `95% CI - Low...104`
## • `95% CI - High` -> `95% CI - High...105`
## • `Quartile` -> `Quartile...106`
## • `Quartile` -> `Quartile...110`
## • `95% CI - Low` -> `95% CI - Low...113`
## • `95% CI - High` -> `95% CI - High...114`
## • `Quartile` -> `Quartile...115`
## • `Quartile` -> `Quartile...119`
## • `95% CI - Low` -> `95% CI - Low...123`
## • `95% CI - High` -> `95% CI - High...124`
## • `Quartile` -> `Quartile...125`
## • `Quartile` -> `Quartile...128`
## • `Quartile` -> `Quartile...131`
## • `95% CI - Low` -> `95% CI - Low...134`
## • `95% CI - High` -> `95% CI - High...135`
## • `Quartile` -> `Quartile...136`
## • `Quartile` -> `Quartile...138`
## • `Quartile` -> `Quartile...140`
## • `95% CI - Low` -> `95% CI - Low...143`
## • `95% CI - High` -> `95% CI - High...144`
## • `Quartile` -> `Quartile...145`
## • `95% CI - Low` -> `95% CI - Low...149`
## • `95% CI - High` -> `95% CI - High...150`
## • `Quartile` -> `Quartile...151`
## • `95% CI - Low` -> `95% CI - Low...154`
## • `95% CI - High` -> `95% CI - High...155`
## • `Quartile` -> `Quartile...156`
## Warning: Expecting logical in CY2096 / R2096C103: got '*'
## Warning: Expecting logical in CY2125 / R2125C103: got '*'
## Warning: Expecting logical in CY3145 / R3145C103: got '* An error was found in this data, and corrected data is not currently available.'
## New names:
## New names:
## • `Quartile` -> `Quartile...7`
## • `95% CI - Low` -> `95% CI - Low...12`
## • `95% CI - High` -> `95% CI - High...13`
## • `Quartile` -> `Quartile...14`
## • `95% CI - Low` -> `95% CI - Low...16`
## • `95% CI - High` -> `95% CI - High...17`
## • `Quartile` -> `Quartile...18`
## • `95% CI - Low` -> `95% CI - Low...20`
## • `95% CI - High` -> `95% CI - High...21`
## • `Quartile` -> `Quartile...22`
## • `95% CI - Low` -> `95% CI - Low...25`
## • `95% CI - High` -> `95% CI - High...26`
## • `Quartile` -> `Quartile...27`
## • `95% CI - Low` -> `95% CI - Low...32`
## • `95% CI - High` -> `95% CI - High...33`
## • `Quartile` -> `Quartile...34`
## • `95% CI - Low` -> `95% CI - Low...36`
## • `95% CI - High` -> `95% CI - High...37`
## • `Quartile` -> `Quartile...38`
## • `Quartile` -> `Quartile...40`
## • `95% CI - Low` -> `95% CI - Low...42`
## • `95% CI - High` -> `95% CI - High...43`
## • `Quartile` -> `Quartile...44`
## • `Quartile` -> `Quartile...46`
## • `95% CI - Low` -> `95% CI - Low...48`
## • `95% CI - High` -> `95% CI - High...49`
## • `Quartile` -> `Quartile...50`
## • `95% CI - Low` -> `95% CI - Low...54`
## • `95% CI - High` -> `95% CI - High...55`
## • `Quartile` -> `Quartile...56`
## • `Quartile` -> `Quartile...59`
## • `95% CI - Low` -> `95% CI - Low...61`
## • `95% CI - High` -> `95% CI - High...62`
## • `Quartile` -> `Quartile...63`
## • `95% CI - Low` -> `95% CI - Low...69`
## • `95% CI - High` -> `95% CI - High...70`
## • `Quartile` -> `Quartile...71`
## • `Quartile` -> `Quartile...75`
## • `Quartile` -> `Quartile...79`
## • `Quartile` -> `Quartile...83`
## • `# Medicare Enrollees` -> `# Medicare Enrollees...84`
## • `95% CI - Low` -> `95% CI - Low...86`
## • `95% CI - High` -> `95% CI - High...87`
## • `Quartile` -> `Quartile...88`
## • `95% CI - Low` -> `95% CI - Low...91`
## • `95% CI - High` -> `95% CI - High...92`
## • `Quartile` -> `Quartile...93`
## • `# Medicare Enrollees` -> `# Medicare Enrollees...96`
## • `95% CI - Low` -> `95% CI - Low...98`
## • `95% CI - High` -> `95% CI - High...99`
## • `Quartile` -> `Quartile...100`
## • `Quartile` -> `Quartile...106`
## • `95% CI - Low` -> `95% CI - Low...110`
## • `95% CI - High` -> `95% CI - High...111`
## • `Quartile` -> `Quartile...112`
## • `Quartile` -> `Quartile...116`
## • `95% CI - Low` -> `95% CI - Low...118`
## • `95% CI - High` -> `95% CI - High...119`
## • `Quartile` -> `Quartile...120`
## • `Quartile` -> `Quartile...127`
## • `95% CI - Low` -> `95% CI - Low...131`
## • `95% CI - High` -> `95% CI - High...132`
## • `Quartile` -> `Quartile...133`
## • `Quartile` -> `Quartile...136`
## • `Quartile` -> `Quartile...139`
## • `95% CI - Low` -> `95% CI - Low...142`
## • `95% CI - High` -> `95% CI - High...143`
## • `Quartile` -> `Quartile...144`
## • `Quartile` -> `Quartile...146`
## • `Quartile` -> `Quartile...148`
## • `95% CI - Low` -> `95% CI - Low...151`
## • `95% CI - High` -> `95% CI - High...152`
## • `Quartile` -> `Quartile...153`
## • `95% CI - Low` -> `95% CI - Low...155`
## • `95% CI - High` -> `95% CI - High...156`
## • `Quartile` -> `Quartile...157`
## • `95% CI - Low` -> `95% CI - Low...163`
## • `95% CI - High` -> `95% CI - High...164`
## • `Quartile` -> `Quartile...165`
RMD$year=as.integer(RMD$year)
RMD=RMD %>% filter(substring(FIPS, 3,5)!="000")
RMD=RMD %>% drop_na(FIPS)
RMDwide= pivot_wider(RMD, id_cols = FIPS, names_from = year, values_from = c(YPLLRateLow, YPLLRateHigh))
RMDwide$range15=(RMDwide$YPLLRateHigh_2015 - RMDwide$YPLLRateLow_2015)/(4*1.96)
RMDwide$irange15=1/RMDwide$range15
RMDwide$weight15=RMDwide$irange15 / sum(RMDwide$irange15, na.rm = TRUE)
sum(RMDwide$weight15 , na.rm = TRUE)
## [1] 1
RMDwide$range16=(RMDwide$YPLLRateHigh_2016 - RMDwide$YPLLRateLow_2016)/(4*1.96)
RMDwide$irange16=1/RMDwide$range16
RMDwide$weight16=RMDwide$irange16 / sum(RMDwide$irange16, na.rm = TRUE)
sum(RMDwide$weight16, na.rm = TRUE)
## [1] 1
RMDwide$range17=(RMDwide$YPLLRateHigh_2017 - RMDwide$YPLLRateLow_2017)/(4*1.96)
RMDwide$irange17=1/RMDwide$range17
RMDwide$weight17=RMDwide$irange17 / sum(RMDwide$irange17, na.rm = TRUE)
sum(RMDwide$weight17, na.rm = TRUE)
## [1] 1
RMDwide$range18=(RMDwide$YPLLRateHigh_2018 - RMDwide$YPLLRateLow_2018)/(4*1.96)
RMDwide$irange18=1/RMDwide$range18
RMDwide$weight18=RMDwide$irange18 / sum(RMDwide$irange18, na.rm = TRUE)
sum(RMDwide$weight18, na.rm = TRUE)
## [1] 1
RMDwide$range19=(RMDwide$YPLLRateHigh_2019 - RMDwide$YPLLRateLow_2019)/(4*1.96)
RMDwide$irange19=1/RMDwide$range19
RMDwide$weight19=RMDwide$irange19 / sum(RMDwide$irange19, na.rm = TRUE)
sum(RMDwide$weight19, na.rm = TRUE)
## [1] 1
weight=RMDwide[c(1, 14, 17, 20, 23, 26)]
weight[is.na(weight)] = 0
weight$averweight=dim(weight)[1]*(weight$weight15 + weight$weight16 + weight$weight17 + weight$weight18 + weight$weight19)/5
weight=weight[c(1, 7)]
save(weight, file = "weight.RData")
Perform merges:
## Warning: One or more parsing issues, see `problems()` for details
Tune xgbTree:
load(file = "YPLLanalytic.Rdata")
nc = parallel::detectCores()
cl = makePSOCKcluster(nc-1) # Set number of cores equal to machine number minus one
registerDoParallel(cl) #Set up parallel
fitControl = trainControl(method = "repeatedcv",
number = 10,
repeats = 10,
allowParallel=TRUE
)
#Step 1: find range of nrounds associated with different learning rates with non-stochastic trees, default max_depth 6, min_child_weight 20
# xgbGrid1 = expand.grid(nrounds = c(1,5,10,20,50,100,150,200,250,300,350,400,450,500,550,600,650,700,750,800,850,900,950,1000,1100,1200,1300,1400,1500),
# max_depth = 6,
# eta = c(.0025,.005,.01,.02,.04,.08,.16,.32),
# gamma = 0,
# colsample_bytree = 1,
# min_child_weight = 20,
# subsample = 1)
#
# set.seed (112358)
# xgbFit1 = train(YPLLdif ~ ., data = YPLLanalytic,
# method = "xgbTree",
# trControl = fitControl,
# tuneGrid = xgbGrid1,
# weights = analyticwrace$averweight,
# nthread=1,
# verbosity = 0
# )
#
# plot(xgbFit1)
# xgbFit1$bestTune
#
# #Step 2: We've ruled out fast learning rates. .005 and .01 are approximately the same. Next, allow for stochastic variation and see if range still works
#
# xgbGrid2 = expand.grid(nrounds = seq(100,1000,100),
# max_depth = 6,
# eta = c(.005,.01),
# gamma = 0,
# colsample_bytree = c(.2,.5,.8,1),
# min_child_weight = 20,
# subsample = c(.2,.5,.8,1))
#
# set.seed (112358)
# xgbFit2 = train(YPLLdif ~ ., data = YPLLanalytic,
# method = "xgbTree",
# trControl = fitControl,
# tuneGrid = xgbGrid2,
# weights = analyticwrace$averweight,
# nthread=1,
# verbosity = 0
# )
#
# plot(xgbFit2)
# xgbFit2$bestTune
#
# #Step 3: eta = .005 confirmed around 600 trees; approx subsample .2, colsample .5; now vary depth
#
# xgbGrid3 = expand.grid(nrounds = seq(400,1200,50),
# max_depth = c(3,4,5,6,7,8,9),
# eta = .005,
# gamma = 0,
# colsample_bytree = c(.4,.5,.6,.7),
# min_child_weight = 20,
# subsample = c(.1,.2,.3))
#
# set.seed (112358)
# xgbFit3 = train(YPLLdif ~ ., data = YPLLanalytic,
# method = "xgbTree",
# trControl = fitControl,
# tuneGrid = xgbGrid3,
# weights = analyticwrace$averweight,
# nthread=1,
# verbosity = 0
# )
#
# plot(xgbFit3)
# xgbFit3$bestTune
#Step 4: final fine-tuning / subsample .2, colsample .5, depth 7, around 600 trees
xgbGridFinal = expand.grid(nrounds = seq(500,700,2),
max_depth = 7,
eta = .005,
gamma = 0,
colsample_bytree = .5,
min_child_weight = 20,
subsample = .2)
set.seed (112358)
xgbFitFinal = train(YPLLdif ~ ., data = YPLLanalytic,
method = "xgbTree",
trControl = fitControl,
tuneGrid = xgbGridFinal,
weights = analyticwrace$averweight,
nthread=1,
verbosity = 0
)
plot(xgbFitFinal)
xgbFitFinal$bestTune
## nrounds max_depth eta gamma colsample_bytree min_child_weight subsample
## 39 576 7 0.005 0 0.5 20 0.2
stopCluster(cl)
save(xgbFitFinal,file="xgbFitFinal.Rdata")
Results:
load(file="YPLLanalytic.Rdata")
load(file = "xgbFitFinal.Rdata")
plot(varImp(xgbFitFinal))
vi=varImp(xgbFitFinal)
vi$importance
## Overall
## obesitya 100.000000
## inactivitya 88.772228
## somecollegea 88.168302
## PCPRateb 60.578163
## inactivityb 58.391528
## childpovertya 57.927225
## hpnon 57.030086
## highschoola 55.004340
## uninsureda 53.720164
## highschoolb 50.112562
## PCPRatea 49.386681
## unemploymenta 46.582051
## somecollegeb 43.606689
## blwt 39.858873
## obesityb 39.850189
## childpovertyb 19.422004
## uninsuredb 8.052255
## unemploymentb 0.000000
xgbFitFinal
## eXtreme Gradient Boosting
##
## 2509 samples
## 18 predictor
##
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 10 times)
## Summary of sample sizes: 2259, 2258, 2258, 2258, 2259, 2258, ...
## Resampling results across tuning parameters:
##
## nrounds RMSE Rsquared MAE
## 500 1089.851 0.02950927 779.6507
## 502 1089.824 0.02953262 779.6333
## 504 1089.795 0.02954632 779.6084
## 506 1089.777 0.02955068 779.5970
## 508 1089.726 0.02960608 779.5757
## 510 1089.728 0.02955611 779.5796
## 512 1089.709 0.02956211 779.5730
## 514 1089.687 0.02957507 779.5708
## 516 1089.670 0.02957168 779.5568
## 518 1089.671 0.02955039 779.5548
## 520 1089.661 0.02953098 779.5451
## 522 1089.635 0.02955591 779.5312
## 524 1089.611 0.02957802 779.5247
## 526 1089.563 0.02963837 779.4881
## 528 1089.562 0.02961942 779.4907
## 530 1089.548 0.02963361 779.4794
## 532 1089.526 0.02964628 779.4655
## 534 1089.499 0.02965325 779.4498
## 536 1089.471 0.02968219 779.4234
## 538 1089.476 0.02965969 779.4225
## 540 1089.459 0.02968307 779.4011
## 542 1089.448 0.02970502 779.4051
## 544 1089.433 0.02971258 779.3936
## 546 1089.453 0.02965811 779.4043
## 548 1089.409 0.02971190 779.3666
## 550 1089.399 0.02970951 779.3552
## 552 1089.363 0.02975816 779.3274
## 554 1089.370 0.02973438 779.3402
## 556 1089.356 0.02974686 779.3198
## 558 1089.325 0.02979144 779.2891
## 560 1089.318 0.02978840 779.2858
## 562 1089.313 0.02978528 779.2697
## 564 1089.297 0.02979120 779.2574
## 566 1089.269 0.02983674 779.2354
## 568 1089.269 0.02983018 779.2329
## 570 1089.248 0.02984857 779.2270
## 572 1089.228 0.02986549 779.2204
## 574 1089.206 0.02989790 779.1958
## 576 1089.177 0.02993063 779.1761
## 578 1089.191 0.02990882 779.1823
## 580 1089.193 0.02990012 779.1823
## 582 1089.207 0.02985788 779.2069
## 584 1089.210 0.02983693 779.2141
## 586 1089.198 0.02987002 779.2167
## 588 1089.205 0.02985623 779.2309
## 590 1089.216 0.02983025 779.2398
## 592 1089.216 0.02982504 779.2428
## 594 1089.203 0.02983931 779.2392
## 596 1089.207 0.02982265 779.2540
## 598 1089.201 0.02982628 779.2407
## 600 1089.214 0.02980098 779.2501
## 602 1089.235 0.02976431 779.2631
## 604 1089.264 0.02970339 779.2806
## 606 1089.252 0.02972357 779.2840
## 608 1089.253 0.02971334 779.2787
## 610 1089.263 0.02969112 779.2887
## 612 1089.240 0.02971430 779.2803
## 614 1089.248 0.02969926 779.2713
## 616 1089.237 0.02971580 779.2594
## 618 1089.236 0.02971987 779.2547
## 620 1089.252 0.02968986 779.2692
## 622 1089.266 0.02966190 779.2846
## 624 1089.268 0.02966561 779.2881
## 626 1089.277 0.02965476 779.2996
## 628 1089.257 0.02969646 779.3043
## 630 1089.246 0.02970701 779.2985
## 632 1089.260 0.02967930 779.3208
## 634 1089.298 0.02962410 779.3465
## 636 1089.303 0.02960992 779.3552
## 638 1089.296 0.02962982 779.3417
## 640 1089.265 0.02967679 779.3261
## 642 1089.271 0.02966479 779.3428
## 644 1089.289 0.02966576 779.3714
## 646 1089.293 0.02966442 779.3724
## 648 1089.306 0.02966532 779.3708
## 650 1089.316 0.02965095 779.3774
## 652 1089.299 0.02969356 779.3580
## 654 1089.306 0.02969807 779.3554
## 656 1089.323 0.02966414 779.3681
## 658 1089.330 0.02966562 779.3846
## 660 1089.361 0.02961481 779.4151
## 662 1089.361 0.02961098 779.4247
## 664 1089.366 0.02960398 779.4407
## 666 1089.353 0.02962163 779.4277
## 668 1089.346 0.02963284 779.4290
## 670 1089.333 0.02964888 779.4244
## 672 1089.342 0.02963171 779.4353
## 674 1089.319 0.02967121 779.4352
## 676 1089.338 0.02963037 779.4488
## 678 1089.320 0.02964777 779.4368
## 680 1089.316 0.02965525 779.4337
## 682 1089.338 0.02962640 779.4484
## 684 1089.357 0.02959830 779.4575
## 686 1089.359 0.02960974 779.4652
## 688 1089.365 0.02960742 779.4639
## 690 1089.365 0.02961497 779.4588
## 692 1089.380 0.02959577 779.4729
## 694 1089.391 0.02959433 779.4730
## 696 1089.397 0.02959123 779.4883
## 698 1089.396 0.02960269 779.4876
## 700 1089.404 0.02958937 779.4925
##
## Tuning parameter 'max_depth' was held constant at a value of 7
## Tuning parameter 'eta' was held constant at a value of 0.005
## Tuning parameter 'gamma' was held constant at a value of 0
## Tuning parameter 'colsample_bytree' was held constant at a value of 0.5
## Tuning parameter 'min_child_weight' was
## held constant at a value of 20
## Tuning parameter 'subsample' was held constant at a value of 0.2
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were nrounds = 576, max_depth = 7, eta = 0.005, gamma = 0, colsample_bytree = 0.5, min_child_weight = 20 and subsample = 0.2.
xgb.pdp = list()
res.partialplot = list()
predvarls = rownames(varImp(xgbFitFinal)$importance)
for (m in 1:length(predvarls)){
xgb.pdp[[m]] =
partial(
object = xgbFitFinal,
pred.var = predvarls[[m]],
plot = FALSE,
chull = TRUE,
plot.engine = "ggplot2"
)
res.partialplot[[m]] = plotPartial(xgb.pdp[[m]], rug =TRUE, train = YPLLanalytic, ylim=c(99, 601))
}
for(j in 1:length(predvarls)){
print(res.partialplot[[j]])
}
library(sfsmisc)
##
## Attaching package: 'sfsmisc'
## The following object is masked from 'package:Hmisc':
##
## errbar
## The following object is masked from 'package:dplyr':
##
## last
library(usmap)
res_e = numeric(length = length(predvarls))
res_semi_e = numeric(length = length(predvarls))
county_e = matrix(data = NA,nrow = dim(YPLLanalytic)[1],ncol = length(predvarls))
res.elastplot = list()
res.semi_elastplot = list()
goodlist = c("somecollegea","somecollegeb","PCPratea","PCPrateb","highschoola","highschoolb") #Variables where increase is hypothesized to reduce YPLLdif
for (j in 1:length(predvarls)) {
sm_pdp_j = smooth.spline(x = xgb.pdp[[j]][, 1], y = xgb.pdp[[j]][, 2], df = 5) #Smooth partial dependency of YPLL on predictor j
d_pdp_d_j = D1tr(y = sm_pdp_j$y, x = sm_pdp_j$x) #Derivative of smoothed partial dependency of YPLL on predictor j
e_pdp_j = (sm_pdp_j$x / sm_pdp_j$y) * d_pdp_d_j #Elasticity of YPLL with respect to predictor j
plot(
sm_pdp_j$x,
e_pdp_j,
xlab = predvarls[j],
ylab = "Elasticity of YPLLdif",
ylim = c(-1, 1)
)
interp.elast = approxfun(x = sm_pdp_j$x, y = e_pdp_j)
res_e[j] = weighted.mean(x = interp.elast(YPLLanalytic[, predvarls[j]]), w =
analyticwrace$averweight)
county_e[, j] = interp.elast(YPLLanalytic[, predvarls[j]])
etest = data.frame(fips = analyticwrace$FIPS, elast = county_e[, j])
# if (predvarls[j] %in% goodlist) {
# semi_e_j = -county_e[, j] * abs(YPLLanalytic$YPLLdif)
# } else semi_e_j = county_e[, j] * abs(YPLLanalytic$YPLLdif)
semi_e_j = county_e[, j] * abs(YPLLanalytic$YPLLdif)
res_semi_e[j]=weighted.mean(x = county_e[, j] * abs(YPLLanalytic$YPLLdif), w =
analyticwrace$averweight)
semi_etest = data.frame(fips = analyticwrace$FIPS, semi_elast = semi_e_j)
res.elastplot[[j]] = plot_usmap(regions = "counties",
data = etest,
values = "elast") + scale_fill_gradientn(
colours = c("darkgreen", "green", "white", "red", "darkred"),
na.value = "gray",
breaks = c(-1,-.5, 0, .5, 1),
labels = c(-1,-.5, 0, .5, 1),
limits = c(-1.2, 1.2)
) + ggtitle(predvarls[j])
print(res.elastplot[[j]])
res.semi_elastplot[[j]] = plot_usmap(regions = "counties",
data = semi_etest,
values = "semi_elast") + scale_fill_gradientn(
colours = c("darkgreen", "green", "white", "red", "darkred"),
na.value = "gray",
breaks = c(-1500,-750,-375, 0, 375, 750, 1500),
labels = c(-1500,-750,-375, 0, 375, 750, 1500),
limits = c(-3000, 3000)
)+ ggtitle(predvarls[j])
print(res.semi_elastplot[[j]])
}
## Warning: Ignoring unknown parameters: linewidth
## Warning: Ignoring unknown parameters: linewidth
## Warning: Ignoring unknown parameters: linewidth
## Warning: Ignoring unknown parameters: linewidth
## Warning: Ignoring unknown parameters: linewidth
## Warning: Ignoring unknown parameters: linewidth
## Warning: Ignoring unknown parameters: linewidth
## Warning: Ignoring unknown parameters: linewidth
## Warning: Ignoring unknown parameters: linewidth
## Warning: Ignoring unknown parameters: linewidth
## Warning: Ignoring unknown parameters: linewidth
## Warning: Ignoring unknown parameters: linewidth
## Warning: Ignoring unknown parameters: linewidth
## Warning: Ignoring unknown parameters: linewidth
## Warning: Ignoring unknown parameters: linewidth
## Warning: Ignoring unknown parameters: linewidth
## Warning: Ignoring unknown parameters: linewidth
## Warning: Ignoring unknown parameters: linewidth
## Warning: Ignoring unknown parameters: linewidth
## Warning: Ignoring unknown parameters: linewidth
## Warning: Ignoring unknown parameters: linewidth
## Warning: Ignoring unknown parameters: linewidth
## Warning: Ignoring unknown parameters: linewidth
## Warning: Ignoring unknown parameters: linewidth
## Warning: Ignoring unknown parameters: linewidth
## Warning: Ignoring unknown parameters: linewidth
## Warning: Ignoring unknown parameters: linewidth
## Warning: Ignoring unknown parameters: linewidth
## Warning: Ignoring unknown parameters: linewidth
## Warning: Ignoring unknown parameters: linewidth
## Warning: Ignoring unknown parameters: linewidth
## Warning: Ignoring unknown parameters: linewidth
## Warning: Ignoring unknown parameters: linewidth
## Warning: Ignoring unknown parameters: linewidth
## Warning: Ignoring unknown parameters: linewidth
## Warning: Ignoring unknown parameters: linewidth
print(cbind(predvarls,res_e)) #Overall elasticity results
## predvarls res_e
## [1,] "obesitya" "0.722628703573935"
## [2,] "inactivitya" "0.370187562607025"
## [3,] "somecollegea" "-0.259859798699842"
## [4,] "PCPRateb" "-0.00423301209769754"
## [5,] "inactivityb" "-0.00427052706075375"
## [6,] "childpovertya" "0.187478719322619"
## [7,] "hpnon" "0.0201177261337519"
## [8,] "highschoola" "-0.0400321326226932"
## [9,] "uninsureda" "-0.239355491732303"
## [10,] "highschoolb" "-0.0112087380758103"
## [11,] "PCPRatea" "0.0399755131094835"
## [12,] "unemploymenta" "0.277642883561138"
## [13,] "somecollegeb" "-0.0526149802345273"
## [14,] "blwt" "-0.00704662253025246"
## [15,] "obesityb" "0.037059867239215"
## [16,] "childpovertyb" "-0.0104389801824501"
## [17,] "uninsuredb" "0.00380738793305452"
## [18,] "unemploymentb" "0.0150438495678506"
print(cbind(predvarls,res_semi_e)) #semi elasticity results
## predvarls res_semi_e
## [1,] "obesitya" "522.65128400551"
## [2,] "inactivitya" "241.663163605211"
## [3,] "somecollegea" "-138.658803621263"
## [4,] "PCPRateb" "-2.43694527315993"
## [5,] "inactivityb" "-2.06347207566088"
## [6,] "childpovertya" "132.046718025043"
## [7,] "hpnon" "15.6972640274671"
## [8,] "highschoola" "-27.1175971504716"
## [9,] "uninsureda" "-166.978743556677"
## [10,] "highschoolb" "-7.51616137709872"
## [11,] "PCPRatea" "24.2266195080033"
## [12,] "unemploymenta" "183.322017382117"
## [13,] "somecollegeb" "-42.7653873332059"
## [14,] "blwt" "-7.71637768114674"
## [15,] "obesityb" "28.4553443468538"
## [16,] "childpovertyb" "-7.34847169099488"
## [17,] "uninsuredb" "2.25357146884707"
## [18,] "unemploymentb" "9.94443943601385"